# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request
from lxml import etree
import urllib.parse

class BaidusSpider(scrapy.Spider):
    name = "baidus"
    allowed_domains = ["www.baidu.com"]
    start_urls = ['https://www.baidu.com/s?wd=%E9%9A%94%E9%9F%B3%E7%AA%97']
    pages = 0
    end_pages = 2
    stop_page = 4

    def parse(self, response):
        # 测试pages
        # for pp in range(self.end_pages):
        #     if pp < self.stop_page:
        #         self.pages = self.pages + 1
        #         print(self.start_urls[0] +"--"+ str(self.pages),"这是个测试代码,===后面是页面增加测试",self.pages)
        self.pages = self.pages + 1

        if self.pages < self.end_pages:
            print(self.start_urls[0] +"--"+ str(self.pages),"这是个测试代码,===后面是页面增加测试",self.pages)
            html = etree.HTML(response.body)
            # print(response.body)
            # 取得相关搜索列表关键词
            xianguan = html.xpath("//div[@id='rs']/table/tbody/tr/th/a/text()")
            # print()
            # for words in xianguan:
            #     print(words)
            #     pass
            self.getKeywords(xianguan)
            for linkwd in xianguan:
                wd_link = "https://www.baidu.com/s?wd="+urllib.parse.quote(linkwd)
                print("正在执行二次抓取页面",wd_link)
                yield Request(url=wd_link, callback=self.parse)
                pass
            pass

    def getKeywords(self,arr):
        if arr:
            for wd in arr:
                print(wd)
        pass
